* ************************* *
* CREATE ANALYSIS VARIABLES *
* ************************* *

/*

Disclaimer:

Any views expressed are those of the authors and not those of the U.S. Census Bureau. The Census Bureau 
has reviewed this data product to ensure appropriate access, use, and disclosure avoidance protection of
the confidential source data used to produce this product. This research was performed at a Federal 
Statistical Research Data Center under FSRDC Project Number 2896. (CBDRB-FY25-P2896-R11993)


Note to users:

The U.S. Census Bureau prohibits the release of replication data but permits the release of replication
code so long as that code does not contain data, including hardcoded values that rely on restricted
information. We have therefore redacted our code accordingly. Users who have been granted access to 
the data at a Federal Statistical Research Data Center can consult the documentation that accompanies the
datasets to recover the hardcoded values that were redacted below.

*/

* ---------------------------------------------------------------------------- *

* ------------- *
* PRELIMINARIES * 
* ------------- *

clear 

set more off
set seed [redacted]

global data "data/"
global code "code/"
global output "output/"
global decennial "[redacted]"

cap log close
log using "logs/creation", replace


cd "/projects/"



* ---------------------------------------------------------------------------- *

* ------- *
* GLOBALS * 
* ------- *

* States

global states ///
	ak al ar az ca co ct dc de fl ///
	ga hi ia id il in ks ky la ma ///
	md me mi mn mo ms mt nc nd ne ///
	nh nj nm nv ny oh ok or pa pr ///
	ri sc sd tn tx ut va vt wa wi ///
	wv wy


	

* ---------------------------------------------------------------------------- *

* -------------------------------- *
* NEED TO LOOP THROUGH EVERY STATE *
* -------------------------------- *

foreach s of global states {

* ---------------------- *
* Open and organize data *
* ---------------------- * 

* Load census files and keep necessary vars only
import sas ///
	puid pseq pnc 						/// 
	pwt bcseq hseq rcseq 					/// 
	qmilad qmilad qmil5 qmiltot 				/// 
	qsex qage qdb qrel msp					/// 
	qspan spanlong 						/// 
	qracex imprace racew raceb raceaian raceasian racenhpi racesor cenrace	/// 
	qpobst qyr2us qancescode1 qancescode2 anc1long anc2long	/// 
	qcitizen qspeak qlangcode qengabil qms 			/// 
	qhigh qincwg qincse esr					/// 
	qsense qlmob qabmen qabphys qabgo qabwork 		/// 
	using "${decennial}cen2000sedf_`s'_03.sas7bdat", clear

	
* Destring variables
* ------------------

destring qpobst, gen(bpl)

destring qsex, gen(sex)

destring qyr2us, gen(yr2us)

destring qcitizen, gen(citizen)

gen birth_year = substr(qdb, 1, 4)
destring birth_year, replace
label variable birth_year "Birth year"

gen birth_month = substr(qdb, 5, 2)
destring birth_month, replace
label variable birth_month "Birth month"

gen birth_day = substr(qdb, 7, 2)
destring birth_day, replace
label variable birth_day "Birth day"

destring qancescode1, gen(qancescode1_num)
destring qancescode2, gen(qancescode2_num)

destring imprace, gen(imprace_num)

destring spanlong, gen(spanlong_num)

destring qhigh, replace


* create a binary race variable
* -----------------------------

gen white = 0
replace white = 1 if cenrace==[redacted]

label variable white "White"

label define white 1 "White" 0 "Not white"
label values white white


* create a binary Western origin variable
* ---------------------------------------

gen origin = 0
label variable origin "From a Western country"

replace origin = 1 if bpl >= [redacted] & bpl <= [redacted] // Europe
replace origin = 1 if bpl == [redacted] // Israel
replace origin = 1 if bpl >= [redacted] & bpl <= [redacted] // Canada, Greenland, St. Pierra/Miquelon
replace origin = 1 if bpl == [redacted] // Australia
replace origin = 1 if bpl == [redacted] // New Zealand 

replace origin = 0 if bpl == [redacted] // turn off Mexico



* create the English exclusive variable
* -------------------------------------

gen engl_exclusive = 0
label variable engl_exclusive "Born in an exclusively English-speaking country"

replace engl_exclusive = 1 if bpl >= [redacted] & bpl <= [redacted] // UK, UK regions, dependencies
replace engl_exclusive = 1 if bpl == [redacted] // Gibraltar
replace engl_exclusive = 1 if bpl == [redacted] // Ireland
replace engl_exclusive = 1 if bpl == [redacted] // Canada
replace engl_exclusive = 1 if bpl == [redacted] // Australia
replace engl_exclusive = 1 if bpl == [redacted] // New Zealand


* (re)create the hispanic variable from IPUMS
* -------------------------------------------

gen hispanic = .

replace hispanic = 0 if spanlong_num == [redacted] // not Hispanic
replace hispanic = 1 if spanlong_num == [redacted] // Mexican 
replace hispanic = 2 if spanlong_num == [redacted] // Puerto Rican
replace hispanic = 3 if spanlong_num == [redacted] // Cuban
replace hispanic = 4 if spanlong_num >= [redacted] & spanlong_num <= [redacted] // other
replace hispanic = 5 if spanlong_num == [redacted] // no response

label variable hispanic "Hispanic"


* Generate a tag for the source file
* ----------------------------------

gen sourcefile = "`s'"
order sourcefile, first	
label variable sourcefile "Source file"


* ---------------------------------------------------------------------------- * 
	
* --------------------------------------- *	
* CREATE A TARGET VARIABLE FOR THE SAMPLE *
* --------------------------------------- *

* Create the variable
cap drop target
gen target = .

label variable target "Draft-eligible immigrant"


* Tag men born abroad within the target birth year range
replace target = 1 if sex == [redacted] & bpl >= [redacted] & birth_year >= 1948 & birth_year <= 1953	
	
	
* De-tag individuals who immigrated after the draft
replace target = . if (birth_year >= 1948 & birth_year <= 1950) & yr2us >= 1970 // 1948-1950 cohort
replace target = . if birth_year == 1951 & yr2us >= 1971 // 1951 cohort
replace target = . if birth_year == 1952 & yr2us >= 1972 // 1952 cohort
replace target = . if birth_year == 1953 & yr2us >= 1973 // 1953 cohort


* De-tag implausible values
replace target = . if yr2us < birth_year
	

* De-tag individuals born abroad to American parents
replace target = . if target == 1 & citizen == [redacted] 	
	
	

* ---------------------------------------------------------------------------- *	

* ---------------------- *
* SAVE INTERMEDIATE FILE *
* ---------------------- *

* this file has everyone from state s in it!
save "${data}intermediate/rt3_intermediate_`s'.dta", replace


* ---------------------------------------------------------------------------- * 

* -------------------------------------- *	
* RESIDENTIAL INTEGRATION PRE-PROCESSING *
* -------------------------------------- *	
	
* import the spatial unit data from RT1	
* -------------------------------------

import sas ///
	bcseq rcseq state county tract block blkgrp ///
	using "/data/decennial/cen/2000/cen2000sedf_`s'_01.sas7bdat", clear
	
save "${data}census_rt1/rt1_`s'.dta", replace


* merge in the spatial unit data from RT1
* ---------------------------------------

* start with RT3 intermediate file
use "${data}intermediate/rt3_intermediate_`s'.dta", clear


* need to use bcseq to link RT1 geography files to RT3 person files
merge m:1 bcseq using "${data}census_rt1/rt1_`s'.dta"


* check matching report, keep matches, then drop _merge
keep if _m == 3	
drop _m


* reorder the spatial variables
order state county tract block blkgrp, after(pnc)


* save RT3-RT1 state-specific merged file
* ---------------------------------------

* this file includes the household info merged to the individual info for state s
save "${data}intermediate/rt3_rt1_`s'.dta", replace


* calculate birthplace percentages by tract
* -----------------------------------------

* start with the RT3-RT1 state-specific merged file
use "${data}intermediate/rt3_rt1_`s'.dta", clear


* calculate total number of individuals by tract
bys county tract: egen tract_pop = count(pnc)


* calculate total number individuals by birthplace and tract
bys county tract bpl: egen bpl_tract_pop = count(pnc)


* calculate percentage pop by birthplace and tract
gen bpl_tract_percent = (bpl_tract_pop / tract_pop) * 100


* keep necessary variables and create a tract-bpl long file
keep county tract bpl tract_pop bpl_tract_pop bpl_tract_percent 

duplicates drop county tract bpl tract_pop bpl_tract_pop bpl_tract_percent , force


* check the results: make sure percents add up to 100
bys county tract: egen check = sum(bpl_tract_percent) 
tab check 
drop check


* save tract-bpl file
save "${data}bpl/tract_bpl_`s'.dta", replace



* calculate birthplace percentages by block group
* -----------------------------------------------

* start with the RT3-RT1 state-specific merged file
use "${data}intermediate/rt3_rt1_`s'.dta", clear


* calculate total number of individuals by block group
bys county tract blkgrp: egen blkgrp_pop = count(pnc)


* calculate total number individuals by birthplace and block group
bys county tract blkgrp bpl: egen bpl_blkgrp_pop = count(pnc)


* calculate percentage pop by birthplace and block group
gen bpl_blkgrp_percent = (bpl_blkgrp_pop / blkgrp_pop) * 100


* keep necessary variables and create a blkgrp-bpl long file
keep county tract blkgrp bpl blkgrp_pop bpl_blkgrp_pop bpl_blkgrp_percent

duplicates drop county tract blkgrp bpl blkgrp_pop bpl_blkgrp_pop bpl_blkgrp_percent, force


* check the results: make sure percents add up to 100
bys county tract blkgrp: egen check = sum(bpl_blkgrp_percent) 
tab check
drop check 


* save blkgrp-bpl file
save "${data}bpl/blkgrp_bpl_`s'.dta", replace


	
* ---------------------------------------------------------------------------- * 

* ---------------------------- *	
* NATIVE SPOUSE PRE-PROCESSING *
* ---------------------------- *

* start with RT3 intermediate file with everyone in it
use "${data}intermediate/rt3_intermediate_`s'.dta", clear

	
* Drop households without tagged individuals
egen temphh = count(target), by(puid) 
	// saves computing power later
	
drop if temphh == 0
drop temphh


* Grab spouse info
* ---------------- 

* Tag target individuals who are also married
cap drop marriedtarget
gen marriedtarget = .
label variable marriedtarget "Target individuals who are married"

replace marriedtarget = target if qms == [redacted]


* Count the number of married target individuals in the HH
cap drop temphhmar
egen temphhmar = count(marriedtarget), by(puid) 

tab temphhmar if marriedtarget == 1
	// in ipums, 99.8% of targets live in households with only one married target
	// to make our lives easier, let's just ignore HHs with more than one married target
replace marriedtarget = . if temphhmar != 1


tab qrel if marriedtarget == 1
	// in ipums, 86.5% of married targets are listed as the HH head
	// in ipums, 10% are listed as the spouse


* Give a unique id to everyone (pnc) who lives in a HH (puid) with exactly 1 married target
cap drop tempsort
gen tempsort = temphhmar == 1

gsort -tempsort puid pnc 
cap drop tempid
gen tempid = _n if tempsort == 1

drop temphhmar tempsort 
	// drop the variable which keeps track of how many married targets there are in the HH

	
* create spouse variables
cap drop spouse*
gen spouse_id = .
label variable spouse_id "Spouse id"

gen spouse_bpl = .
label variable spouse_bpl "Spouse birthplace"

gen spouse_white = .
label variable spouse_white "White spouse"

gen spouse_hispanic = .	
label variable spouse_hispanic "Hispanic spouse"


* fill in info of spouse for married targets who are HH heads
* -----------------------------------------------------------

* make sure that a spouse is present in the HH
cap drop tempspouse*
gen tempspouse = 1 if qrel == [redacted]
egen tempspouse_inhh = count(tempspouse), by(puid)


* assign an index to all married targets, HH heads, with spouse present
cap drop temptagg
egen temptagg = seq() if marriedtarget == 1 & qrel == [redacted] & tempspouse_inhh == 1

drop tempspouse*

cap drop temp_puid
destring puid, gen(temp_puid)

cap drop count_valid
egen count_valid = count(temptagg)

su temptagg

if count_valid > 0 { // can also use: if `r(N)'> 0 { 

forvalues i = 1 / `r(max)' {
	
	// store the hh number as a local
	qui su temp_puid if temptagg == `i'
	local hhid = `r(mean)'					

	// store the tempid of the spouse as a local if a spouse is present in the HH 
	qui su tempid if temp_puid == `hhid' & qrel == [redacted]
	local wifeid = `r(mean)'
	
	qui replace spouse_id = `wifeid' if temptagg == `i'

	// assign spouse values to HHhead
	foreach var of varlist bpl white hispanic {
		qui replace spouse_`var' = `var'[`wifeid'] if temptagg == `i'
	}
}
}


* fill in info of spouse for married targets who are themselves spouses
* ---------------------------------------------------------------------

// assign an index to all married targets, HH heads, with spouse present
cap drop temptagg
egen temptagg = seq() if marriedtarget == 1 & qrel == [redacted]

cap drop count_valid
egen count_valid = count(temptagg)

su temptagg

if count_valid > 0 { // can also use: if `r(N)'> 0 { 
	
forvalues i = 1 / `r(max)' {	
	
	// store the hh number as a local
	qui su temp_puid if temptagg == `i'
	local hhid = `r(mean)'					

	// store the tempid of the spouse as a local if a spouse is present in the HH 
	qui su tempid if temp_puid == `hhid' & qrel == [redacted]
	local wifeid = `r(mean)'
	
	qui replace spouse_id = `wifeid' if temptagg == `i'

	
	// assign spouse values to target
	foreach var of varlist bpl white hispanic {
		qui replace spouse_`var' = `var'[`wifeid'] if temptagg == `i'
	}
}
}




* drop all of the temp variables
* ------------------------------
cap drop tempid temptagg


* drop everyone for whom spousal variables are missing
* ----------------------------------------------------
drop if spouse_id == .


* check to make sure data includes only married targets who are HH heads or spouses
* ---------------------------------------------------------------------------------
tab qms
tab qrel


* save the spouse file
* --------------------
save "${data}spouse/spouse_`s'.dta", replace


* ---------------------------------------------------------------------------- * 

* -------------------- *	
* STRUCTURE THE SAMPLE *
* -------------------- *

* Start again with the RT3 intermediate file containing everyone from state s
use "${data}intermediate/rt3_rt1_`s'.dta", clear


* Keep only target individuals: men born 1948-1953, immigrated before draft
keep if target == 1



* ---------------------------------------------------------------------------- * 

* ------------------------------ *
* MERGE IN RSN, RES, SPOUSE DATA *
* ------------------------------ *

* Lottery
* -------
merge m:1 birth_year birth_month birth_day using "${data}lottery/rsns.dta"
	// _m == 2 are birthdays without target individuals in the sample
keep if _m == 3
drop _m


* Residential vars
* ----------------

* Merge in birthplace percentages by tract and print results
merge m:1 county tract bpl using "${data}/bpl/tract_bpl_`s'.dta"
tab _m
drop if _m == 2 // drop all tract-bpl combos not mergeable to a tract
drop _m


* Merge in birthplace percentages by blkgrp and print results
merge m:1 county tract blkgrp bpl using "${data}/bpl/blkgrp_bpl_`s'.dta"
tab _m
drop if _m == 2 // drop all blkgrp-bpl combos not mergeable to a blkgrp
drop _m


* Label variables
label variable tract_pop "Tract population"
label variable bpl_tract_pop "Count co-nationals in tract"
label variable bpl_tract_percent "Percent co-nationals in tract"
label variable blkgrp_pop "Block group population"
label variable bpl_blkgrp_pop "Count co-nationals in block group"
label variable bpl_blkgrp_percent "Percent co-nationals in block group"


* Spouse
* ------

merge 1:1 puid pseq pnc using "${data}spouse/spouse_`s'.dta" 
	// _m = 1 people without spouse vars
tab _m 	
drop _m


* ---------------------------------------------------------------------------- * 
	
	
* --------------- *
* SAVE STATE FILE *
* --------------- *

* Save
cd "/projects/"
save "${data}census/`s'.dta", replace

}

* ---------------------------------------------------------------------------- * 

* ----------------------------- *
* APPEND INDIVIDUAL STATE FILES *
* ----------------------------- *

clear
foreach s of global states {
cap append using "${data}census/`s'.dta"
}

* ---------------------------------------------------------------------------- * 

* ----------------------------- *
* SAVE INTERMEDIATE MERGED FILE *
* ----------------------------- *

save "${data}intermediate/allstates_intermediate.dta", replace

* ---------------------------------------------------------------------------- *

* --------------------------------- *
* START WITH ALL STATES MERGED FILE *
* --------------------------------- *

use "${data}intermediate/allstates_intermediate.dta", clear


* ---------------------------------------------------------------------------- *

* ---------------------- *	
* VIETNAM VETERAN STATUS *
* ---------------------- *

gen veteran = .

replace veteran = 1 if qmil5 == [redacted]
replace veteran = 0 if qmil5 == [redacted]

label variable veteran "Vietnam veteran"


* ---------------------------------------------------------------------------- * 

* ---------- *
* DRAFT RISK *
* ---------- *

gen draft_risk = 0
replace draft_risk = 1 if rsn <= apn

label variable draft_risk "Draft risk"


* ---------------------------------------------------------------------------- * 

* ----------------------- *	
* YEARS SINCE IMMIGRATION *
* ----------------------- *

* Calculate years since immigration; smallest value should be 2000 - 1972 = 28
gen yrs_since_immig = 2000 - yr2us

label variable yrs_since_immig "Years since immigration"


* ---------------------------------------------------------------------------- * 

* ------------------ *	
* AGE AT IMMIGRATION *
* ------------------ *

* Calculate age at immigration
gen age_immig = yr2us - birth_year

label variable age_immig "Age at immigration"


* ---------------------------------------------------------------------------- * 

* -------------- *	
* NATURALIZATION *
* -------------- *	

* code the easy cases first
gen naturalized = .
label variable naturalized "Naturalized"

replace naturalized = 1 if citizen == [redacted] // yes at time of 2000 census
replace naturalized = 0 if citizen == [redacted] // no at time of 2000 census
replace naturalized = . if citizen == [redacted] | citizen == [redacted] | citizen == [redacted] // born in US or abroad to US parents - shouldn't be in the sample


* ---------------------------------------------------------------------------- *

* ----------------------- *
* RESIDENTIAL INTEGRATION *
* ----------------------- *

gen res_integrate_tract = (100 - bpl_tract_percent)/100
label var res_integrate_tract "Residential integration (tract)"

gen res_integrate_blkgrp = (100 - bpl_blkgrp_percent)/100
label var res_integrate_blkgrp "Residential integration (block group)"


* ---------------------------------------------------------------------------- * 

* ------- *	
* ENGLISH *
* ------- *	

* Only speaks English
* -------------------

gen only_engl = qspeak == [redacted]  
label variable only_engl "Only speaks English"

replace only_engl = . if engl_exclusive == 1  //  people who come from exclusively english-speaking countries

	
* English ability
* ---------------

gen engl_ability = .
label variable engl_ability "English ability"

replace engl_ability = 0 if qengabil == [redacted] // not at all
replace engl_ability = 1 if qengabil == [redacted] // not well
replace engl_ability = 2 if qengabil == [redacted] // well
replace engl_ability = 3 if qengabil == [redacted] // very well
replace engl_ability = 4 if qspeak == [redacted] // English only
replace engl_ability = . if engl_exclusive == 1  //  people who come from exclusively english-speaking countries
	// note that missing = not in universe
	// this codes English-only speakers > fluent multilinguals
	
	
* English ability, pooled
* -----------------------

gen engl_ability_pooled = .
label variable engl_ability_pooled "English ability (pooled)"

replace engl_ability_pooled = 0 if qengabil == [redacted] // not at all
replace engl_ability_pooled = 1 if qengabil == [redacted] // not well
replace engl_ability_pooled = 2 if qengabil == [redacted] // well
replace engl_ability_pooled = 3 if qengabil == [redacted] // very well
replace engl_ability_pooled = 3 if qspeak == [redacted] // English only
replace engl_ability_pooled = . if engl_exclusive == 1  //  people who come from exclusively english-speaking countries
	// note that missing = not in universe
	// pool English only speakers with the top coding	
	
	
* ---------------------------------------------------------------------------- * 

* -------------- *
* MARITAL STATUS *
* -------------- *

gen married = qms == [redacted]
label variable married "Married"

drop if married == 1 & spouse_bpl == . // drop married people whose spouses cannot be identified

* ---------------------------------------------------------------------------- * 


* ------------- *
* NATIVE SPOUSE *
* ------------- *

* Native-born spouse
* ------------------

gen spouse_native = 0
label variable spouse_native "Native-born spouse"

replace spouse_native = 1 if spouse_bpl >= [redacted] & spouse_bpl <= [redacted]
	// excludes US outlying territories
	// this means a PR-born American who marries another PR is coded as spouse_native = 0


* Non-co-national spouses
* -----------------------

gen spouse_notconatl = 0
label variable spouse_notconatl "Non-co-national spouse"

replace spouse_notconatl = 1 if spouse_bpl != bpl
replace spouse_notconatl = 0 if spouse_bpl == .


* White native-born spouse
* ------------------------

gen spouse_whitenative = 0
label variable spouse_whitenative "White native-born spouse"

replace spouse_whitenative = 1 if spouse_bpl >= [redacted] & spouse_bpl <= [redacted] & spouse_white == 1 // excludes outlying areas

replace spouse_whitenative = . if origin == 1 // missing for people from Western countries

 
* ---------------------------------------------------------------------------- * 
 
* ---------------------- *
* EDUCATIONAL ATTAINMENT *
* ---------------------- *

gen college_some = qhigh >= [redacted]
label variable college_some "At least some college"

gen college_grad = qhigh >= [redacted]
label variable college_grad "Graduated college"


* ---------------------------------------------------------------------------- * 

* ------ *
* INCOME *
* ------ *

rename qincwg income_wage
label variable income_wage "Wage/salary income"

rename qincse income_se 
label variable income_se "Self-employment income"

egen income_total = rsum(income_wage income_se)

replace income_total = income_total / 10000
label variable income_total "Income"

* ---------------------------------------------------------------------------- * 

* ------------ * 
* UNEMPLOYMENT *
* ------------ *

* Unemployed
gen unemployed = 0
replace unemployed = 1 if esr == [redacted] | esr == [redacted]
label variable unemployed "Unemployed"

* ---------------------------------------------------------------------------- * 

* -------------------- *
* COUNTS BY BIRTHPLACE *
* -------------------- *

* Count of individuals by birthplace
bys bpl: egen bpl_count = count(pnc)
label variable bpl_count "Number of individuals by birthplace"


* ---------------------------------------------------------------------------- * 

* ------------------- *
* BORN IN TERRITORIES *
* ------------------- *

* Indicator for people born in U.S. territories/outlying areas
gen terr_cits = citizen == [redacted]
label variable terr_cits "Born in U.S. territory"


* ---------------------------------------------------------------------------- * 

* ------------------------- *
* NORTH AMERICAN IMMIGRANTS *
* ------------------------- *

gen northam_immig = bpl == [redacted] | bpl == [redacted]
label variable northam_immig "North American immigrants"

* ---------------------------------------------------------------------------- * 

* --------------------------- *
* BIRTHPLACE TURNED COMMUNIST *
* --------------------------- *

gen communist_bpl = 0

* code the states that were actually independent
replace communist_bpl = 1 if bpl == [redacted] // Albania
replace communist_bpl = 1 if bpl == [redacted] // Bulgaria
replace communist_bpl = 1 if bpl == [redacted] // Czechoslovakia
replace communist_bpl = 1 if bpl == [redacted] // Hungary
replace communist_bpl = 1 if bpl == [redacted] // Poland
replace communist_bpl = 1 if bpl == [redacted] // Romania
replace communist_bpl = 1 if bpl == [redacted] // Soviet Union
replace communist_bpl = 1 if bpl == [redacted] // Yugoslavia

replace communist_bpl = 1 if bpl == [redacted] // Cuba

replace communist_bpl = 1 if bpl == [redacted] // Ghana
replace communist_bpl = 1 if bpl == [redacted] // Mali
replace communist_bpl = 1 if bpl == [redacted] // Senegal
replace communist_bpl = 1 if bpl == [redacted] // Tanzania
replace communist_bpl = 1 if bpl == [redacted] // Zambia

replace communist_bpl = 1 if bpl == [redacted] // China
replace communist_bpl = 1 if bpl == [redacted] // Mongolia
replace communist_bpl = 1 if bpl == [redacted] // North Korea
replace communist_bpl = 1 if bpl == [redacted] // North Vietnam

* also code the successor states 
replace communist_bpl = 1 if bpl == [redacted] // Czechia
replace communist_bpl = 1 if bpl == [redacted] // Slovakia
replace communist_bpl = 1 if bpl == [redacted] // Bosnia
replace communist_bpl = 1 if bpl == [redacted] // Croatia
replace communist_bpl = 1 if bpl == [redacted] // Macedonia
replace communist_bpl = 1 if bpl == [redacted] // Slovenia
replace communist_bpl = 1 if bpl == [redacted] // Serbia and Montenegro
replace communist_bpl = 1 if bpl == [redacted] // Estonia
replace communist_bpl = 1 if bpl == [redacted] // Latvia
replace communist_bpl = 1 if bpl == [redacted] // Lithuania
replace communist_bpl = 1 if bpl == [redacted] // Armenia
replace communist_bpl = 1 if bpl == [redacted] // Azerbaijan
replace communist_bpl = 1 if bpl == [redacted] // Belarus
replace communist_bpl = 1 if bpl == [redacted] // Georgia
replace communist_bpl = 1 if bpl == [redacted] // Moldova
replace communist_bpl = 1 if bpl == [redacted] // Russia
replace communist_bpl = 1 if bpl == [redacted] // Ukraine
replace communist_bpl = 1 if bpl == [redacted] // Kazakhstan
replace communist_bpl = 1 if bpl == [redacted] // Kyrgyzstan
replace communist_bpl = 1 if bpl == [redacted] // Tajikistan
replace communist_bpl = 1 if bpl == [redacted] // Turkmenistan
replace communist_bpl = 1 if bpl == [redacted] // Uzbekistan
replace communist_bpl = 1 if bpl == [redacted] // Kosovo




* ---------------------------------------------------------------------------- * 

* -------------------------- *
* DEMOGRAPHICS FOR PROFILING *
* -------------------------- *

gen profileage = 2000 - (birth_year + (birth_month / 12))
label variable profileage "Age in months"

gen profilepr = bpl == [redacted]
label variable profilepr "Born in Puerto Rico"

gen profilecanada = bpl == [redacted]
label variable profilecanada "Born in Canada"

gen profilemexico = bpl == [redacted]
label variable profilemexico "Born in Mexico"

gen profileitaly = bpl == [redacted]
label variable profileitaly "Born in Italy"

gen profilegermany = bpl == [redacted]
label variable profilegermany "Born in Germany"

gen profilewhite = cenrace == [redacted]
label variable profilewhite "White"

gen profileblack = cenrace == [redacted]
label variable profileblack "Black"

gen profilehispanic = spanlong != [redacted]
label variable profilehispanic "Hispanic"

gen profileasian = cenrace == [redacted] | cenrace == [redacted]
label variable profileasian "Asian"

gen profilenotwhite = profileblack == 1 | profileasian == 1
label variable profilenotwhite "Asian or Black"

* ---------------------------------------------------------------------------- * 

* -------------------- *
* DUMMIES FOR XTIVREG2 *
* -------------------- *

tab birth_year, gen(byear_)
tab birth_month, gen(bmonth_)

* ---------------------------------------------------------------------------- * 

* ---------------------- *
* SLIVERS FOR DISCLOSURE *
* ---------------------- *

gen profileothers = (profilewhite == 0 & profileblack == 0 & profileasian == 0)

gen notprofile_engl = (engl_exclusive == 1 & profilecanada == 0)

gen notprofile_origin = (origin == 1 & profileitaly == 0 & profilegermany == 0 & profilecanada == 0)

gen married_nonnative = (married == 1 & spouse_native == 0)

gen married_conatl = (married == 1 & spouse_notconatl == 0)


* ---------------------------------------------------------------------------- * 

* ------------------------------------- *
* CREATE DISC STAT MANAGEMENT VARIABLES *
* ------------------------------------- *

gen id = _n
label variable id "Unique person identifier"

gen all = 1
label variable all "Counter for disclosure stats"

* ---------------------------------------------------------------------------- * 


* ------------ *
* DATA CLEANUP *
* ------------ *

* Create list of variables for data management and analysis
global keepvars	///
 	id all sourcefile bcseq hseq rcseq puid pseq pnc state county tract block blkgrp  /// 
	birth_year birth_month birth_day /// 
	bpl citizen terr_cits northam_immig yr2us yrs_since_immig age_immig hispanic citizen communist /// 
	target white origin  /// 
	rsn_year rsn apn veteran draft_risk /// 
	res_integrate_tract res_integrate_blkgrp bpl_tract_percent bpl_blkgrp_percent /// 
	married spouse_native spouse_notconatl spouse_whitenative /// 
	engl_exclusive only_engl engl_ability engl_ability_pooled /// 
	naturalized /// 
	college_some college_grad income_wage income_se income_total unemployed /// 
	byear_* bmonth_* /// 
	profileage profilepr profilecanada profilemexico profileitaly profilegermany /// 
	profilewhite profileblack profilehispanic profileasian profilenotwhite profileothers bpl_count ///  
	profileothers notprofile_engl notprofile_origin married_nonnative married_conatl 

* Keep only the variables in the list above and order them as above
keep ${keepvars}
order ${keepvars}, first



* ---------------------------------------------------------------------------- * 

* ------------------ *
* DEFINE THE SAMPLES *
* ------------------ *

// enables looping later

* sample lists
* ------------

* main samples
gen sample_main1 = 1 if birth_year >= 1949 & birth_year <= 1952
label variable sample_main1 "Pooled 1949-1952 birth cohorts"

gen sample_main2 = 1 if birth_year >= 1949 & birth_year <= 1952 & origin == 1
label variable sample_main2 "Western 1949-1952 birth cohorts" 

gen sample_main3 = 1 if birth_year >= 1949 & birth_year <= 1952 & origin == 0
label variable sample_main3 "Non-Western 1949-1952 birth cohorts"

* ---------------------------------------------------------------------------- * 


* --------------------------- *
* SAVE ANALYSIS-READY DATASET *
* --------------------------- *

save "${data}analysis.dta", replace




